# load libraries
library(Cairo)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(viridis)
## Loading required package: viridisLite
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(ggrepel)
library(ggpubr)

gene_class_map <- c(
  clingen_hi = "Haploinsufficient",
  autosomal_dominant = "Autosomal dominant", 
  autosomal_recessive = "Autosomal recessive", 
  depmap_essential = "DepMap essential", 
  mouse_essential = "Mouse essential",
  oncokb_tumor_supressor = "Tumor supressor gene",
  oncokb_oncogene = "Oncogene", 
  olfactory = "Olfactory"
)

satmut_promoters_gene_metadata <- as_tibble(fread("../../results/satmut_promoters_preprocess/satmut_promoters_gene_metadata/satmut_promoters_gene_metadata.txt.gz"))

satmut_promoters_pred_final_prom_level <- as_tibble(fread("../../results/satmut_promoters_preprocess/satmut_promoters_final_prom/satmut_promoters_final_prom_all.txt.gz"))

satmut_promoters_pred_final_prom_level <- satmut_promoters_pred_final_prom_level %>% 
  left_join(satmut_promoters_gene_metadata) %>% 
  filter(analysis_version %in% c("promoter_250bp", "shuffled_promoter_250bp")) 
## Joining with `by = join_by(id, strand)`
satmut_promoters_pred_final_prom_level <- satmut_promoters_pred_final_prom_level %>% 
  mutate(
    correlation_mam241_vs_avgKHS_rho_padj_abs = p.adjust(correlation_mam241_vs_avgKHS_rho_pval_abs, method = "fdr"), 
    correlation_mam241_vs_avgKHS_rho_padj_pos = p.adjust(correlation_mam241_vs_avgKHS_rho_pval_pos, method = "fdr"), 
    correlation_mam241_vs_avgKHS_rho_padj_neg = p.adjust(correlation_mam241_vs_avgKHS_rho_pval_neg, method = "fdr"), 
  )

satmut_promoters_pred_final_prom_level_null <- satmut_promoters_pred_final_prom_level %>% 
  filter(analysis_version == "shuffled_promoter_250bp")

satmut_promoters_pred_final_prom_level_real <- satmut_promoters_pred_final_prom_level %>% 
  filter(analysis_version == "promoter_250bp")

threshold_abs <- satmut_promoters_pred_final_prom_level_null$correlation_mam241_vs_avgKHS_rho_est_abs %>% quantile(0.999, na.rm=T)
threshold_pos <- satmut_promoters_pred_final_prom_level_null$correlation_mam241_vs_avgKHS_rho_est_pos %>% quantile(0.999, na.rm=T)
threshold_neg <- satmut_promoters_pred_final_prom_level_null$correlation_mam241_vs_avgKHS_rho_est_neg %>% quantile(0.001, na.rm=T)
threshold_fdr <- 0.01

satmut_promoters_pred_final_prom_level <- satmut_promoters_pred_final_prom_level %>% 
  mutate(sig_pos_0.999_neg_0.001 = 
    ifelse(((correlation_mam241_vs_avgKHS_rho_padj_pos < threshold_fdr) & (correlation_mam241_vs_avgKHS_rho_est_pos > threshold_pos)) & 
     ((correlation_mam241_vs_avgKHS_rho_padj_neg < threshold_fdr) & (correlation_mam241_vs_avgKHS_rho_est_neg < threshold_neg)), "both", 
    ifelse((correlation_mam241_vs_avgKHS_rho_padj_pos < threshold_fdr) & (correlation_mam241_vs_avgKHS_rho_est_pos > threshold_pos), "pos", 
      ifelse((correlation_mam241_vs_avgKHS_rho_padj_neg < threshold_fdr) & (correlation_mam241_vs_avgKHS_rho_est_neg < threshold_neg), "neg", "nonsig"))))
table(satmut_promoters_pred_final_prom_level_real$sig_pos_0.999_neg_0.001)
## Warning: Unknown or uninitialised column: `sig_pos_0.999_neg_0.001`.
## < table of extent 0 >
write_tsv(satmut_promoters_pred_final_prom_level, gzfile("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level.txt.gz"))

satmut_promoters_pred_final_prom_level_null <- satmut_promoters_pred_final_prom_level %>% 
  filter(analysis_version == "shuffled_promoter_250bp")
write_tsv(satmut_promoters_pred_final_prom_level_null, gzfile("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_null.txt.gz"))

satmut_promoters_pred_final_prom_level_real <- satmut_promoters_pred_final_prom_level %>% 
  filter(analysis_version == "promoter_250bp")
write_tsv(satmut_promoters_pred_final_prom_level_real, gzfile("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real.txt.gz"))
table(satmut_promoters_pred_final_prom_level_real$sig_0.999_abs)
## Warning: Unknown or uninitialised column: `sig_0.999_abs`.
## < table of extent 0 >
table(satmut_promoters_pred_final_prom_level_real$sig_0.999_abs)/
  sum(table(satmut_promoters_pred_final_prom_level_real$sig_0.999_abs))
## Warning: Unknown or uninitialised column: `sig_0.999_abs`.
## Unknown or uninitialised column: `sig_0.999_abs`.
## numeric(0)
p <- ggdensity(satmut_promoters_pred_final_prom_level, 
  x = "correlation_mam241_vs_avgKHS_rho_est_abs",
   add = "none", rug = FALSE, 
   color = "analysis_version", fill = "analysis_version",
   palette = c("#66c2a5", "grey80")  #fc8d62
  ) + 
  geom_vline(
    aes(xintercept = threshold_abs),
    color = "#66c2a5", 
    linetype = "dashed"
  ) + 
  scale_color_manual(
    values = c("promoter_250bp" = "#66c2a5", "shuffled_promoter_250bp" = "grey80"),  #fc8d62
    labels = c("promoter_250bp" = "Real", "shuffled_promoter_250bp" = "Shuffled"),
    name = "Promoter"
  ) +
  scale_fill_manual(
    values = c("promoter_250bp" = "#66c2a5", "shuffled_promoter_250bp" = "grey80"),  #fc8d62
    labels = c("promoter_250bp" = "Real", "shuffled_promoter_250bp" = "Shuffled"),
    name = "Promoter"
  ) +
  labs(x = "Spearman's ρ", y = "Density") +
  theme(
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey95"), # Customize horizontal grid lines
    panel.grid.minor.y = element_line(color = "grey95"), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
d <- ggplot_build(p)$data[[1]] %>% 
  filter(colour == "#66c2a5")
## Warning: Removed 606 rows containing non-finite outside the scale range
## (`stat_density()`).
p + geom_area(data = subset(d, x > threshold_abs), aes(x=x, y=y), fill="#66c2a5", alpha = 0.7)
## Warning: Removed 606 rows containing non-finite outside the scale range
## (`stat_density()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_univariate_abs.pdf", scale=0.6, device = cairo_pdf)
## Saving 4.2 x 3 in image
## Warning: Removed 606 rows containing non-finite outside the scale range
## (`stat_density()`).
table(satmut_promoters_pred_final_prom_level_real$sig_0.999_pos)
## Warning: Unknown or uninitialised column: `sig_0.999_pos`.
## < table of extent 0 >
table(satmut_promoters_pred_final_prom_level_real$sig_0.999_pos)/
  sum(table(satmut_promoters_pred_final_prom_level_real$sig_0.999_pos))
## Warning: Unknown or uninitialised column: `sig_0.999_pos`.
## Unknown or uninitialised column: `sig_0.999_pos`.
## numeric(0)
p <- ggdensity(satmut_promoters_pred_final_prom_level, 
  x = "correlation_mam241_vs_avgKHS_rho_est_pos",
   add = "none", rug = FALSE, 
   color = "analysis_version", fill = "analysis_version",
   palette = c("#8da0cb", "grey80")  #fc8d62
  ) + 
  geom_vline(
    aes(xintercept = threshold_pos),
    color = "#8da0cb", 
    linetype = "dashed"
  ) + 
  scale_color_manual(
    values = c("promoter_250bp" = "#8da0cb", "shuffled_promoter_250bp" = "grey80"),  #fc8d62
    labels = c("promoter_250bp" = "Real", "shuffled_promoter_250bp" = "Shuffled"),
    name = "Promoter"
  ) +
  scale_fill_manual(
    values = c("promoter_250bp" = "#8da0cb", "shuffled_promoter_250bp" = "grey80"),  #fc8d62
    labels = c("promoter_250bp" = "Real", "shuffled_promoter_250bp" = "Shuffled"),
    name = "Promoter"
  ) +
  labs(x = "Spearman's ρ", y = "Density") +
  theme(
    aspect.ratio = 1/5, 
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey95"), # Customize horizontal grid lines
    panel.grid.minor.y = element_line(color = "grey95"), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
d <- ggplot_build(p)$data[[1]] %>% 
  filter(colour == "#8da0cb")
## Warning: Removed 1814 rows containing non-finite outside the scale range
## (`stat_density()`).
p + geom_area(data = subset(d, x > threshold_pos), aes(x=x, y=y), fill="#8da0cb", alpha = 0.7) + 
  theme(aspect.ratio=1/3)  # + coord_flip()
## Warning: Removed 1814 rows containing non-finite outside the scale range
## (`stat_density()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_univariate_pos.pdf", scale=0.6, device = cairo_pdf)
## Saving 4.2 x 3 in image
## Warning: Removed 1814 rows containing non-finite outside the scale range
## (`stat_density()`).
table(satmut_promoters_pred_final_prom_level_real$sig_0.999_neg)
## Warning: Unknown or uninitialised column: `sig_0.999_neg`.
## < table of extent 0 >
table(satmut_promoters_pred_final_prom_level_real$sig_0.999_neg)/
  sum(table(satmut_promoters_pred_final_prom_level_real$sig_0.999_neg))
## Warning: Unknown or uninitialised column: `sig_0.999_neg`.
## Unknown or uninitialised column: `sig_0.999_neg`.
## numeric(0)
p <- ggdensity(satmut_promoters_pred_final_prom_level, 
  x = "correlation_mam241_vs_avgKHS_rho_est_neg",
   add = "none", rug = FALSE, 
   color = "analysis_version", fill = "analysis_version",
   palette = c("#fc8d62", "grey80")  #fc8d62
  ) + 
  geom_vline(
    aes(xintercept = threshold_neg),
    color = "#fc8d62", 
    linetype = "dashed"
  ) + 
  scale_color_manual(
    values = c("promoter_250bp" = "#fc8d62", "shuffled_promoter_250bp" = "grey80"),  #fc8d62
    labels = c("promoter_250bp" = "Real", "shuffled_promoter_250bp" = "Shuffled"),
    name = "Promoter"
  ) +
  scale_fill_manual(
    values = c("promoter_250bp" = "#fc8d62", "shuffled_promoter_250bp" = "grey80"),  #fc8d62
    labels = c("promoter_250bp" = "Real", "shuffled_promoter_250bp" = "Shuffled"),
    name = "Promoter"
  ) +
  labs(x = "Spearman's ρ", y = "Density") +
  theme(
    aspect.ratio = 1/5, 
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey95"), # Customize horizontal grid lines
    panel.grid.minor.y = element_line(color = "grey95"), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  )
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
d <- ggplot_build(p)$data[[1]] %>% 
  filter(colour == "#fc8d62")
## Warning: Removed 1244 rows containing non-finite outside the scale range
## (`stat_density()`).
p + geom_area(data = subset(d, x < threshold_neg), aes(x=x, y=y), fill="#fc8d62", alpha = 0.7) + 
  theme(aspect.ratio=1/3)
## Warning: Removed 1244 rows containing non-finite outside the scale range
## (`stat_density()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_univariate_neg.pdf", scale=0.6, device = cairo_pdf)
## Saving 4.2 x 3 in image
## Warning: Removed 1244 rows containing non-finite outside the scale range
## (`stat_density()`).
top_select_genes <- satmut_promoters_pred_final_prom_level_real %>% 
  filter(sig_pos_0.999_neg_0.001 != "nonsig") %>% 
  mutate(correlation_mam241_vs_avgKHS_rho_est_pos_temp = ifelse(sig_pos_0.999_neg_0.001 == "neg", 0, correlation_mam241_vs_avgKHS_rho_est_pos)) %>% 
  mutate(correlation_mam241_vs_avgKHS_rho_est_neg_temp = ifelse(sig_pos_0.999_neg_0.001 == "pos", 0, correlation_mam241_vs_avgKHS_rho_est_neg)) %>% 
  arrange(desc(correlation_mam241_vs_avgKHS_rho_est_pos_temp - correlation_mam241_vs_avgKHS_rho_est_neg_temp)) %>% 
  dplyr::select(-correlation_mam241_vs_avgKHS_rho_est_pos_temp, -correlation_mam241_vs_avgKHS_rho_est_neg_temp) %>% 
  group_by(sig_pos_0.999_neg_0.001) %>% 
  slice(1:10) %>% 
  ungroup()
write_tsv(top_select_genes, "../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_select_genes.txt")

top_ordered_genes <- satmut_promoters_pred_final_prom_level_real %>% 
  mutate(correlation_mam241_vs_avgKHS_rho_est_pos_temp = ifelse(sig_pos_0.999_neg_0.001 == "neg", 0, correlation_mam241_vs_avgKHS_rho_est_pos)) %>% 
  mutate(correlation_mam241_vs_avgKHS_rho_est_neg_temp = ifelse(sig_pos_0.999_neg_0.001 == "pos", 0, correlation_mam241_vs_avgKHS_rho_est_neg)) %>% 
  arrange(desc(correlation_mam241_vs_avgKHS_rho_est_pos_temp - correlation_mam241_vs_avgKHS_rho_est_neg_temp)) %>% 
  dplyr::select(-correlation_mam241_vs_avgKHS_rho_est_pos_temp, -correlation_mam241_vs_avgKHS_rho_est_neg_temp) %>% 
  group_by(sig_pos_0.999_neg_0.001) %>% 
  ungroup()
write_tsv(top_ordered_genes, gzfile("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_ordered_genes.txt.gz"))

top_ordered_genes_both <- filter(top_ordered_genes, sig_pos_0.999_neg_0.001 == "both")$gene_name
top_ordered_genes_neg <- filter(top_ordered_genes, sig_pos_0.999_neg_0.001 == "neg")$gene_name
top_ordered_genes_pos <- filter(top_ordered_genes, sig_pos_0.999_neg_0.001 == "pos")$gene_name
top_ordered_genes_nonsig <- filter(top_ordered_genes, sig_pos_0.999_neg_0.001 == "nonsig")$gene_name
top_ordered_genes_background <- filter(top_ordered_genes, !is.na(correlation_mam241_vs_avgKHS_rho_est_pos) | !is.na(correlation_mam241_vs_avgKHS_rho_est_neg))$gene_name
write(top_ordered_genes_both, "../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_genes_both.txt")
write(top_ordered_genes_neg, "../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_genes_neg.txt")
write(top_ordered_genes_pos, "../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_genes_pos.txt")
write(top_ordered_genes_nonsig, "../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_genes_nonsig.txt")
write(top_ordered_genes_background, "../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_top_genes_background.txt")

genes_to_highlight <- c("LDLR", "ARNT", "EMILIN2", "CCR10")  # , top_ordered_genes_nonsig[1])

ggscatter(satmut_promoters_pred_final_prom_level_real, 
  x = "correlation_mam241_vs_avgKHS_rho_est_neg", 
  y = "correlation_mam241_vs_avgKHS_rho_est_pos", 
  color = "sig_pos_0.999_neg_0.001", fill = "sig_pos_0.999_neg_0.001",
  palette = c("#984ea3", "#fc8d62", "#e3e1ef", "#8da0cb"), 
  alpha = 0.5
) + 
  xlab("Spearman's ρ (max(0, phyloP), allelic skew-)") + 
  ylab("Spearman's ρ (max(0, phyloP), allelic skew+)") + 
  theme(aspect.ratio = 1) + 
  geom_point(aes(
    x = correlation_mam241_vs_avgKHS_rho_est_neg, 
    y = correlation_mam241_vs_avgKHS_rho_est_pos, 
  ), color = "black", fill = "black",  alpha = 0.5, 
  data = filter(satmut_promoters_pred_final_prom_level_real, gene_name %in% genes_to_highlight)) + 
  geom_text_repel(aes(
    x = correlation_mam241_vs_avgKHS_rho_est_neg, 
    y = correlation_mam241_vs_avgKHS_rho_est_pos, 
    label = gene_name, 
  ), 
  data = filter(satmut_promoters_pred_final_prom_level_real, gene_name %in% genes_to_highlight), 
  max.overlaps = Inf)
## Warning: Removed 963 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_bivariate_pos_neg_v2.pdf", device = cairo_pdf)
## Saving 7 x 5 in image
## Warning: Removed 963 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggviolin(
  filter(satmut_promoters_pred_final_prom_level_real, !is.na(sig_pos_0.999_neg_0.001)), 
  x = "sig_pos_0.999_neg_0.001", y = "s_het",
  fill = "sig_pos_0.999_neg_0.001", 
  palette = c("#e3e1ef", "#fc8d62", "#984ea3", "#8da0cb"), 
  draw_quantiles = c(0.5)
  ) + 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.10))) + 
  geom_pwc(
    method = "wilcox_test", label = "p.format", 
    hide.ns = TRUE
  ) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.line.x = element_blank()) + 
  theme(
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
    panel.grid.minor.y = element_blank(), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  ) + 
  theme(aspect.ratio = 1.6) + 
  labs(y = "s_het", x = "")
## Warning: Removed 1219 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1219 rows containing non-finite outside the scale range
## (`stat_pwc()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_rho_by_genic_s_het.pdf", scale = 0.7, device = cairo_pdf)
## Saving 4.9 x 3.5 in image
## Warning: Removed 1219 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Removed 1219 rows containing non-finite outside the scale range (`stat_pwc()`).
ggviolin(
  filter(satmut_promoters_pred_final_prom_level_real, !is.na(sig_pos_0.999_neg_0.001)), 
  x = "sig_pos_0.999_neg_0.001", y = "loeuf",
  fill = "sig_pos_0.999_neg_0.001", 
  palette = c("#e3e1ef", "#fc8d62", "#984ea3", "#8da0cb"), 
  draw_quantiles = c(0.5)
  ) + 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.10))) + 
  geom_pwc(
    method = "wilcox_test", label = "p.format", 
    hide.ns = TRUE
  ) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.line.x = element_blank()) + 
  theme(
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
    panel.grid.minor.y = element_blank(), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  ) + 
  theme(aspect.ratio = 1.6) + 
  labs(y = "LOEUF", x = "")
## Warning: Removed 1269 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1269 rows containing non-finite outside the scale range
## (`stat_pwc()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_rho_by_genic_loeuf.pdf", scale = 0.7, device = cairo_pdf)
## Saving 4.9 x 3.5 in image
## Warning: Removed 1269 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Removed 1269 rows containing non-finite outside the scale range (`stat_pwc()`).
ggviolin(
  filter(satmut_promoters_pred_final_prom_level_real, !is.na(sig_pos_0.999_neg_0.001)), 
  x = "sig_pos_0.999_neg_0.001", y = "moeuf",
  fill = "sig_pos_0.999_neg_0.001", 
  palette = c("#e3e1ef", "#fc8d62", "#984ea3", "#8da0cb"), 
  draw_quantiles = c(0.5)
  ) + 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.10))) + 
  geom_pwc(
    method = "wilcox_test", label = "p.format", 
    hide.ns = TRUE
  ) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.line.x = element_blank()) + 
  theme(
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
    panel.grid.minor.y = element_blank(), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  ) + 
  theme(aspect.ratio = 1.6) + 
  labs(y = "MOEUF", x = "")
## Warning: Removed 1127 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 1127 rows containing non-finite outside the scale range
## (`stat_pwc()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_rho_by_genic_moeuf.pdf", scale = 0.7, device = cairo_pdf)
## Saving 4.9 x 3.5 in image
## Warning: Removed 1127 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Removed 1127 rows containing non-finite outside the scale range (`stat_pwc()`).
ggviolin(
  filter(satmut_promoters_pred_final_prom_level_real, !is.na(sig_pos_0.999_neg_0.001)), 
  x = "sig_pos_0.999_neg_0.001", y = "alphmis",
  fill = "sig_pos_0.999_neg_0.001", 
  palette = c("#e3e1ef", "#fc8d62", "#984ea3", "#8da0cb"), 
  draw_quantiles = c(0.5)
  ) + 
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.10))) + 
  geom_pwc(
    method = "wilcox_test", label = "p.format", 
    hide.ns = TRUE
  ) + 
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), axis.line.x = element_blank()) + 
  theme(
    panel.grid.major.x = element_blank(),   # Remove vertical major grid lines
    panel.grid.minor.x = element_blank(),   # Remove vertical minor grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
    panel.grid.minor.y = element_blank(), # Optional: lighter horizontal minor grid lines
    axis.line.y = element_blank()          # Remove the y-axis line
  ) + 
  theme(aspect.ratio = 1.6) + 
  labs(y = "AlphaMissense", x = "")
## Warning: Removed 3144 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Warning: Removed 3144 rows containing non-finite outside the scale range
## (`stat_pwc()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_rho_by_genic_alphmis.pdf", scale = 0.7, device = cairo_pdf)
## Saving 4.9 x 3.5 in image
## Warning: Removed 3144 rows containing non-finite outside the scale range
## (`stat_ydensity()`).
## Removed 3144 rows containing non-finite outside the scale range (`stat_pwc()`).
ggscatter(satmut_promoters_pred_final_prom_level_real,
  x = "logTPM_K562", 
  y = "K562_ref_pred_prom", 
  alpha = 0.1, 
  color = "#47928B"
) + 
  xlab(expression(log[2] ~ "TPM K562")) +
  ylab("K562 activity (250bp mean)") +
  theme(
    panel.grid.major.x = element_line(color = "grey90"),   # Remove vertical major grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
  ) + 
  theme(aspect.ratio = 1) + 
  stat_cor(method = "pearson", 
           cor.coef.name = "r", 
           label.x = Inf, label.y = Inf, 
           hjust = 1, vjust = 1)
## Warning: Removed 3 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_real_vs_pred_K562_expr.pdf", scale = 0.6, device = cairo_pdf)
## Saving 4.2 x 3 in image
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_cor()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggscatter(satmut_promoters_pred_final_prom_level_real,
  x = "logTPM_HepG2", 
  y = "HepG2_ref_pred_prom", 
  alpha = 0.1, 
  color = "#DBAE65"
) + 
  xlab(expression(log[2] ~ "TPM HepG2")) +
  ylab("HepG2 activity (250bp mean)") +
  theme(
    panel.grid.major.x = element_line(color = "grey90"),   # Remove vertical major grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
  ) + 
  theme(aspect.ratio = 1) + 
  stat_cor(method = "pearson", 
           cor.coef.name = "r", 
           label.x = Inf, label.y = Inf, 
           hjust = 1, vjust = 1)
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_cor()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_real_vs_pred_HepG2_expr.pdf", scale = 0.6, device = cairo_pdf)
## Saving 4.2 x 3 in image
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_cor()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggscatter(satmut_promoters_pred_final_prom_level_real,
  x = "logTPM_SKNSH", 
  y = "SKNSH_ref_pred_prom", 
  alpha = 0.1, 
  color = "#C44543"
) + 
  xlab(expression(log[2] ~ "TPM SKNSH")) +
  ylab("SKNSH activity (250bp mean)") +
  theme(
    panel.grid.major.x = element_line(color = "grey90"),   # Remove vertical major grid lines
    panel.grid.major.y = element_line(color = "grey90"), # Customize horizontal grid lines
  ) + 
  theme(aspect.ratio = 1) + 
  stat_cor(method = "pearson", 
           cor.coef.name = "r", 
           label.x = Inf, label.y = Inf, 
           hjust = 1, vjust = 1)
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_cor()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_real_vs_pred_SKNSH_expr.pdf", scale = 0.6, device = cairo_pdf)
## Saving 4.2 x 3 in image
## Warning: Removed 3 rows containing non-finite outside the scale range (`stat_cor()`).
## Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
ggdensity(satmut_promoters_pred_final_prom_level_real %>% filter(!is.na(sig_pos_0.999_neg_0.001)), 
  x = "avgKHS_emVar_neg_prom", 
  alpha = 0.2, 
  color = "sig_pos_0.999_neg_0.001", 
  palette = c("#984ea3", "#fc8d62", "#e3e1ef", "#8da0cb")
) + 
  labs(x = "Number of negative emVars", y = "Density", color = "") + 
  theme(aspect.ratio = 1/2) + 
  xlim(c(0, 160))

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_cell_emVar_neg_prom.pdf", device = cairo_pdf)
## Saving 7 x 5 in image
ggdensity(satmut_promoters_pred_final_prom_level_real %>% filter(!is.na(sig_pos_0.999_neg_0.001)), 
  x = "avgKHS_emVar_pos_prom", 
  alpha = 0.2, 
  color = "sig_pos_0.999_neg_0.001", 
  palette = c("#984ea3", "#fc8d62", "#e3e1ef", "#8da0cb")
) + 
  labs(x = "Number of positive emVars", y = "Density", color = "") + 
  theme(aspect.ratio = 1/2) + 
  xlim(c(0, 160))

ggsave("../../results/satmut_promoters_analysis/satmut_promoters_overall_promoters/satmut_promoters_pred_final_prom_level_real_cell_emVar_pos_prom.pdf", device = cairo_pdf)
## Saving 7 x 5 in image